\contentsline {chapter}{Summary}{vi}{dummy.2}
\contentsline {chapter}{Samenvatting}{vii}{dummy.3}
\contentsline {chapter}{Acknowledgements}{viii}{dummy.4}
\vspace {1em}
\contentsline {chapter}{List of Figures}{xiii}{dummy.6}
\contentsline {chapter}{List of Tables}{xv}{dummy.8}
\vspace {2em}
\contentsline {chapter}{\numberline {1}Introduction}{1}{chapter.10}
\contentsline {section}{\numberline {1.1}A Brief Overview of Instance Matching}{3}{section.12}
\contentsline {subsection}{\numberline {1.1.1}What is instance matching?}{3}{subsection.13}
\contentsline {subsection}{\numberline {1.1.2}How does it work?}{3}{subsection.24}
\contentsline {subsection}{\numberline {1.1.3}Why is it challenging to do instance matching on Linked Data?}{4}{subsection.25}
\contentsline {section}{\numberline {1.2}Thesis Scope and Outline}{4}{section.26}
\contentsline {subsection}{\numberline {1.2.1}Towards Self-Linking Linked Data}{4}{subsection.27}
\contentsline {subsection}{\numberline {1.2.2}SERIMI: Class-based Matching for Instance Matching Across Heterogeneous Datasets}{4}{subsection.28}
\contentsline {subsection}{\numberline {1.2.3}Efficient and Effective On-the-fly Candidate Selection over Sparql Endpoints}{5}{subsection.29}
\contentsline {subsection}{\numberline {1.2.4}Learning Edit-Distance Based String Transformation Rules From Examples}{5}{subsection.30}
\contentsline {subsection}{\numberline {1.2.5}Exercises on Knowledge Based Acceleration}{6}{subsection.31}
\contentsline {subsection}{\numberline {1.2.6}Conclusions}{7}{subsection.32}
\contentsline {chapter}{\numberline {2}Towards a self-linking Linked Data }{9}{chapter.33}
\contentsline {section}{\numberline {2.1}Introduction}{9}{section.34}
\contentsline {section}{\numberline {2.2}A General Architecture}{11}{section.40}
\contentsline {subsection}{\numberline {2.2.1}Building a Self-Linking Linked Data}{11}{subsection.41}
\contentsline {subsection}{\numberline {2.2.2}Interlinking as a Query Problem}{12}{subsection.42}
\contentsline {subsection}{\numberline {2.2.3}Self-linking Policies}{14}{subsection.44}
\contentsline {subsection}{\numberline {2.2.4}SPARQL Extensions to Support Self-linking}{14}{subsection.45}
\contentsline {subsection}{\numberline {2.2.5}Standardization of Interlinking Algorithms}{15}{subsection.48}
\contentsline {section}{\numberline {2.3}Proof of Concept}{15}{section.49}
\contentsline {section}{\numberline {2.4}Future Work}{16}{section.53}
\contentsline {section}{\numberline {2.5}Conclusion}{17}{section.54}
\contentsline {chapter}{\numberline {3}SERIMI: Class-based Matching for Instance Matching Across Heterogeneous Datasets}{19}{chapter.55}
\contentsline {section}{\numberline {3.1}Introduction}{20}{section.56}
\contentsline {section}{\numberline {3.2}Preliminary Definitions}{23}{section.62}
\contentsline {section}{\numberline {3.3}Overview of the Approach}{24}{section.67}
\contentsline {section}{\numberline {3.4}Class-Based Matching}{27}{section.71}
\contentsline {subsection}{\numberline {3.4.1}Formal Definition}{27}{subsection.73}
\contentsline {section}{\numberline {3.5}Class-based Matching: A Solution}{29}{section.78}
\contentsline {subsection}{\numberline {3.5.1}Basic Solution}{29}{subsection.79}
\contentsline {subsection}{\numberline {3.5.2}Reducing the Number of Comparisons}{33}{subsection.108}
\contentsline {subsection}{\numberline {3.5.3}Selecting the Threshold}{34}{subsection.129}
\contentsline {section}{\numberline {3.6}Evaluation}{36}{section.154}
\contentsline {subsection}{\numberline {3.6.1}Task Analysis}{39}{subsection.160}
\contentsline {subsection}{\numberline {3.6.2}SERIMI Configurations}{44}{subsection.169}
\contentsline {subsection}{\numberline {3.6.3}SERIMI vs. Alternative Approaches}{49}{subsection.173}
\contentsline {section}{\numberline {3.7}Related Work}{51}{section.176}
\contentsline {section}{\numberline {3.8}Conclusion}{52}{section.177}
\contentsline {chapter}{\numberline {4}Efficient and Effective On-the-fly Candidate Selection over Sparql Endpoints}{53}{chapter.178}
\contentsline {section}{\numberline {4.1}Introduction}{54}{section.179}
\contentsline {section}{\numberline {4.2}Overview}{57}{section.204}
\contentsline {subsection}{\numberline {4.2.1}Problem - Find Candidate Matches}{57}{subsection.207}
\contentsline {subsection}{\numberline {4.2.2}Existing Solutions}{59}{subsection.210}
\contentsline {subsection}{\numberline {4.2.3}Sonda}{59}{subsection.211}
\contentsline {section}{\numberline {4.3}Learning Queries}{61}{section.213}
\contentsline {subsection}{\numberline {4.3.1}Finding Comparable Key Pairs}{62}{subsection.233}
\contentsline {subsection}{\numberline {4.3.2}Constructing Attribute Components}{63}{subsection.235}
\contentsline {subsection}{\numberline {4.3.3}Learning Class Components}{64}{subsection.251}
\contentsline {section}{\numberline {4.4}Executing Optimal Queries}{65}{section.252}
\contentsline {subsection}{\numberline {4.4.1}Estimating Metrics for Query Optimality}{66}{subsection.254}
\contentsline {subsection}{\numberline {4.4.2}Optimal Queries for One Instance}{67}{subsection.259}
\contentsline {subsection}{\numberline {4.4.3}Optimization Process for All Instances}{69}{subsection.263}
\contentsline {section}{\numberline {4.5}Evaluation}{72}{section.319}
\contentsline {subsection}{\numberline {4.5.1}Candidate Selection Results}{77}{subsection.324}
\contentsline {subsection}{\numberline {4.5.2}Instance Matching Results}{81}{subsection.328}
\contentsline {subsection}{\numberline {4.5.3}Utility of the Approach}{82}{subsection.331}
\contentsline {section}{\numberline {4.6}Related Work}{83}{section.334}
\contentsline {section}{\numberline {4.7}Conclusions}{84}{section.335}
\contentsline {chapter}{\numberline {5}Learning Edit-Distance Based String Transformation Rules From Examples}{85}{chapter.336}
\contentsline {section}{\numberline {5.1}Introduction}{86}{section.337}
\contentsline {subsection}{\numberline {5.1.1}Overview and Contributions}{88}{subsection.341}
\contentsline {section}{\numberline {5.2}Learning Transformations}{89}{section.342}
\contentsline {subsection}{\numberline {5.2.1}Preliminary Definitions}{90}{subsection.343}
\contentsline {subsection}{\numberline {5.2.2}Transformation Rules}{91}{subsection.351}
\contentsline {subsection}{\numberline {5.2.3}Generalization of Transformation Rules}{92}{subsection.353}
\contentsline {subsection}{\numberline {5.2.4}Learning Problem}{95}{subsection.359}
\contentsline {section}{\numberline {5.3}Rule Learner Algorithm}{96}{section.365}
\contentsline {subsection}{\numberline {5.3.1}Rule Learning}{96}{subsection.366}
\contentsline {subsection}{\numberline {5.3.2}Relative Position Algorithm}{96}{subsection.367}
\contentsline {subsection}{\numberline {5.3.3}Permutation Rule Learner}{98}{subsection.393}
\contentsline {subsection}{\numberline {5.3.4}Insertions and Deletions Rule Learner}{101}{subsection.421}
\contentsline {subsection}{\numberline {5.3.5}Update Rule Learner }{102}{subsection.424}
\contentsline {subsection}{\numberline {5.3.6}Discussion}{103}{subsection.425}
\contentsline {section}{\numberline {5.4}Rule Selector Method}{103}{section.426}
\contentsline {section}{\numberline {5.5}Evaluation}{104}{section.427}
\contentsline {subsection}{\numberline {5.5.1}Data}{104}{subsection.428}
\contentsline {subsection}{\numberline {5.5.2}Evaluation Metric }{106}{subsection.431}
\contentsline {subsection}{\numberline {5.5.3}Rule Coverage}{107}{subsection.433}
\contentsline {subsection}{\numberline {5.5.4}Rule Selector Accuracy}{108}{subsection.437}
\contentsline {subsection}{\numberline {5.5.5}Runtime Cost}{110}{subsection.443}
\contentsline {subsection}{\numberline {5.5.6}Performance Comparison}{111}{subsection.445}
\contentsline {section}{\numberline {5.6}Related Work}{113}{section.448}
\contentsline {section}{\numberline {5.7}Conclusions}{114}{section.449}
\contentsline {chapter}{\numberline {6}Exercises on Knowledge Based Acceleration}{117}{chapter.450}
\contentsline {section}{\numberline {6.1}Introduction}{118}{section.451}
\contentsline {section}{\numberline {6.2}TREC-KBA Task Overview}{119}{section.452}
\contentsline {subsection}{\numberline {6.2.1}Data Overview}{119}{subsection.453}
\contentsline {subsection}{\numberline {6.2.2}TREC-KBA Baseline}{120}{subsection.456}
\contentsline {section}{\numberline {6.3}Approaches}{120}{section.458}
\contentsline {subsection}{\numberline {6.3.1}Entity Representation}{120}{subsection.459}
\contentsline {subsection}{\numberline {6.3.2}Prefix-Suffix Learning Approach}{121}{subsection.474}
\contentsline {subsection}{\numberline {6.3.3}Disambiguator Approach}{122}{subsection.493}
\contentsline {subsection}{\numberline {6.3.4}Language Model Approach}{125}{subsection.563}
\contentsline {section}{\numberline {6.4}Evaluations and Discussions}{126}{section.597}
\contentsline {section}{\numberline {6.5}Conclusion}{128}{section.599}
\contentsline {chapter}{\numberline {7}Conclusions}{129}{chapter.600}
\contentsline {section}{\numberline {7.1}Research Questions}{129}{section.601}
\contentsline {subsection}{\numberline {7.1.1}Towards Self-Linking Linked Data}{129}{subsection.602}
\contentsline {subsection}{\numberline {7.1.2}SERIMI: Class-based Matching for Instance Matching Across Heterogeneous Datasets}{129}{subsection.603}
\contentsline {subsection}{\numberline {7.1.3}Efficient and Effective On-the-fly Candidate Selection over Sparql Endpoints}{130}{subsection.604}
\contentsline {subsection}{\numberline {7.1.4}Learning Edit-Distance Based String Transformation Rules From Examples}{131}{subsection.607}
\contentsline {subsection}{\numberline {7.1.5}Exercises on Knowledge Based Acceleration}{132}{subsection.608}
\contentsline {section}{\numberline {7.2}Future Research}{133}{section.609}
\vspace {2em}
\contentsline {chapter}{\numberline {A}Jaccard Vs. FSSim}{135}{appendix.610}
\vspace {2em}
\contentsline {chapter}{Bibliography}{137}{dummy.613}
\contentsline {chapter}{SIKS Dissertations}{147}{appendix*.615}
